clear
set matsize 800
*set type double, permanently
 
*add file path
gl dat " "
gl do " "

******************
*** PETRA data ***
******************

* OBS note, PETRA dont include F+N, have L if they have been in the country at least 1 year(K5=ausweiss L >1 year)
* Statpop includes F+N and date on arrival to Switzerland
* To harmonize only include B+C, and add F using Zemis directly


foreach z in age  all {
forvalues x=1998/2010  {
use "$dat/PETRA/STWB__ZE`x'.dta", clear
g year=`x'
g age=year-geburtsjahr
if "`z'"=="age" keep if age>=18 & age<=65 

drop if (status=="AI"|status=="K5") // AI= residence permit with employment, K5=L>12 months 

ren nation country //same coding as in Zemis 

recode country 997=998 // Without nationality (non-recognized state)/stateless

g east_europe=inlist(country,201,205,220,230,232, 235,238,239,240) | (country>=242 & country<=266)
drop if country>=201 & country<=266 & east_europe!=1 // drop west european countries  to speed up calculations 

ren wohnkanton cant_num

keep cant_num year country 

levelsof country, local(clist)
*display "`clist'"
foreach c of local clist {
bys cant_num: egen c`c'=sum(country==`c') 
}

bys cant_num: g new= _n==1
keep if new==1
drop new country 


if `x'==1998 {
save "$dat/petra_cant_`z'.dta", replace
}
else {
append using "$dat/petra_cant_`z'.dta"
save  "$dat/petra_cant_`z'.dta", replace
}
}
}




********************
*** STATPOP data ***
********************

cap pr drop c
pr de c
	replace cant = `1' if mun>=`2' & mun<=`3' 
end

foreach z in all age {
forvalues x=2011/2014 {

use "$dat/STATPOP/STATPOP_`x'.dta", clear 
drop if inlist(residentpermit, 1,6,7,10,11,12,13,-2,-9 ) //to make it comparable to PETRA 

drop if (residentpermit==5|residentpermit==8) //F + N

ren nationalitystate country
recode country (-9=999) (-6 -1=998) //No indication =999,  Not attributable according to current borders+Stateless=998
replace country =mod(country,1000) if country!=999 & country!=998
drop if country==100 //Swiss


g east_europe=inlist(country,201,205,220,230,232, 235,238,239,240) | (country>=242 & country<=266)
drop if country>=201 & country<=266 & east_europe!=1 // drop west european countries  to speed up calculations 


g year=`x'
g byear=regexs(0) if regexm(dateofbirth,"^[0-9]+")
destring byear, replace
g age=year-byear
if "`z'"=="age" keep if age>=18 & age<=65

ren reportingmunicipalityid mun
g cant_num=.
c 1 1 298 
c 2 301 996 
c 3 1001 1151 
c 4 1201 1220 
c 5 1301 1375
c 6 1401 1407
c 7 1501 1511
c 8 1601 1632 
c 9 1701 1711
c 10 2001 2338 
c 11 2401 2622
c 12 2701 2703
c 13 2761 2895
c 14 2901 2974
c 15 3001 3038
c 16 3101 3111
c 17 3201 3444
c 18 3501 3987
c 19 4001 4323
c 20 4401 4951
c 21 5001 5398 
c 22 5401 5939
c 23 6001 6300
c 24 6401 6512 
c 25 6601 6645
c 26 6701 6810


keep country cant_num year 

levelsof country, local(clist)
foreach c of local clist {
qui bys cant_num: egen c`c'=sum(country==`c') 
}

bys cant_num: g new= _n==1
keep if new==1
drop new country 

if `x'==2011 {
save "$dat/statpop_cant_`z'.dta", replace
}
else {
append using "$dat/statpop_cant_`z'.dta"
save  "$dat/statpop_cant_`z'.dta", replace
}
}
}




***********************
**** population data***
***********************

forvalues x=1998/2015{
import excel "$dat/canton_data/municipalities_pop.xls", sheet("`x'") clear
if `x'<2000 {
keep K A
ren K pop
}
else {
keep J A
ren J pop
}
ren A cant
drop if mi(pop)|mi(cant)
g year=`x'
duplicates drop
replace cant=subinstr(cant,"-","",.)
replace cant=subinstr(cant,"Kanton","",.)
replace cant=subinstr(cant,"Cantone del","",.)
replace cant=subinstr(cant,"Canton de","",.)
replace cant=subinstr(cant,"Canton du","",.)
replace cant=trim(cant)
qui do "$do/cant_num.do"
		
keep if !mi(cant_num)
ren cant cant_res
destring pop, replace
if `x'==1998 {
save "$dat/pop_all.dta", replace
}
else {
append using "$dat/pop_all.dta"
save  "$dat/pop_all.dta", replace
}
}


************************
**  Unemployment data **
************************

forvalues x=1994/2015 {
if `x'<=1999 {
import excel "$dat/canton_data/kantquot_1994_1999.xls", sheet("`x'") cellrange(A4:J41) firstrow clear
keep A J
ren J unemp`x'
}

if `x'>=2000 & `x'<=2009 {
import excel "$dat/canton_data/kantquot_2000_2015.xls", sheet("`x'") cellrange(A4:J41) firstrow clear
keep A J
ren J unemp`x'
}
if `x'>=2010 {
import excel "$dat/canton_data/kantquot_2000_2015.xls", sheet("`x'") cellrange(A4:R41) firstrow clear
keep A R
ren R unemp`x'
}

drop if mi(unemp`x')
ren A cant
replace cant=subinstr(cant,"1)","",.)
replace cant=subinstr(cant,"2)","",.)
replace cant=trim(cant)
drop if inlist(cant, "Total:","Deutsche Schweiz","Westschweiz + Tessin")
if `x'==1994 save "$dat/unemp.dta", replace
else {
merge 1:1 cant using "$dat/unemp.dta"
drop _merge
save "$dat/unemp.dta", replace
}
}

qui do "$do/cant_num.do"
ren cant cant_res
reshape long unemp , i(cant_num) j(year)
save "$dat/unemp.dta", replace


*******************
****  Merge     ***
*******************
clear all

foreach q in age all {
use "$dat/petra_cant_`q'.dta", clear
append using "$dat/statpop_cant_`q'.dta"


merge 1:1 cant_num year using  "$dat/pop_all.dta", keepusing(pop)
drop if _merge==2
drop _merge


merge 1:1 cant_num year using  "$dat/unemp.dta", keepusing(unemp)
drop if _merge==2
drop _merge


order _all, alphabetic

/* check that countries with missing are correct
foreach x of varlist c201-c999 {
egen num_`x'=sum(`x'==.)
if num_`x'==0 drop num_`x'  
}
drop num_*
*/


*value can be missing if no one in any canton from given country, should be zero if at least one canton had values in given year
foreach x of varlist c201-c999 {
replace `x'=0 if mi(`x')
}

*merge some countries where coding changes over time or missing specific country in Zemis
replace c508=c508+c509+c543 // Hongkong + Tibet ---> China 
drop c509  c543 

cap replace c350=c350+c363 // South Sudan --> Sudan
cap drop c363 

replace c248=c248+c220+c254+c256+c249 //Montenegro, Kosovo, Serbia and Montenegro --> Serbia,  Serbia & Montenegro missing 1998, but 220=ex-jugoslavia, and there is info for all other ex-jugoslavia Croatia=250 Slovenia=251 Bosnia and Herzegovina=252 Macedonia=255
drop c220 c254 c256  c249 


* create continent variables
order _all, alphabetic
foreach x of varlist c201 c205 c230 c232 c235 c238-c240 c242-c266 {
if "`x'"=="c201" g eeurope=`x' 
else replace eeurope=eeurope+`x' if !mi(`x') 
}

foreach x of varlist c301-c372 {
if "`x'"=="c301" g africa=`x' 
else replace africa=africa+`x' if !mi(`x') 
}

foreach x of varlist c401-c438 /*then US and island*/ {
if "`x'"=="c401" g samerica=`x' 
else replace samerica=samerica+`x' if !mi(`x')
}

foreach x of varlist c501-c567 {
if "`x'"=="c501" g asia=`x' 
else replace asia=asia+`x' if !mi(`x') 
}

*600- Australia, New Zeland etc

g mig=eeurope+africa+samerica+asia+c998

drop c998 c999 // stateless or missing info 

g cant=""
loc j 0
foreach z in ZH BE LU UR SZ OW NW GL ZG FR SO BS BL SH AR AI SG GR AG TG TI VD VS NE GE JU {
loc ++j 
replace cant="`z'" if cant_num==`j'
}


replace year= year+1 // use network previous year


/* check that number of immigrants in cantons looks smooth over time
order _all, alphabetic
g all=0
foreach x of varlist c201-c999 {
replace all=all+`x'  
}

g high=1 if inlist(cant_num, 1,2,3,17,19,22,25) //ZH BE SG LU AG VD GE
replace high=0 if inlist(cant_num,4,5,6,7,8,9,10,11,12,13,14,15,16,18,20,21,23,24,26)
twoway connect all year if high==1, by(cant_num) sort ylabel(, angle(horizontal)) by(,graphregion(fcolor(white)))
twoway connect all year if high==0, by(cant_num) sort ylabel(, angle(horizontal)) by(,graphregion(fcolor(white)))
drop high all
*/

drop cant_num 
ren unemp unemp_ass
ren pop pop_ass
ren cant cantass
ren year fyear

save "$dat/cant_`q'.dta", replace

}




















